On cherche à étudier l’effet de trois facteurs sur le transcriptome des racines d’Arabidopsis. On cherche ici à utiliser et comparer différents packages de multiDE sur nos données.
data <- read.csv("quantifFiles/quantifGenes.csv", h = T, sep = ",")
rownames(data) <- data$Gene
genes = which(!(grepl("__", rownames(data))))
not_quant = data[which((grepl("__", rownames(data)))), ]
data = data[genes, grepl("R", colnames(data))]
head(data) R3 R2 R1 R5 R4 R6 R7 R11 R10 R12 R13 R17 R16 R14
AT1G01010 1526 1006 1116 1275 967 1018 854 1132 1294 1364 2325 2113 2193 2564
AT1G01020 416 285 289 349 364 370 226 513 502 561 461 407 432 614
AT1G01030 31 15 19 29 36 28 12 47 34 47 18 40 32 44
R15 R18 R24 R19 R22 R23 R21 R20 R9 R8
AT1G01010 2364 2074 1987 2027 1754 1697 1537 1898 816 912
AT1G01020 380 502 484 467 426 415 413 462 223 312
AT1G01030 37 27 42 39 36 40 37 37 15 19
[ reached 'max' / getOption("max.print") -- omitted 3 rows ]
[1] 28775 24
annot <- read.csv("Code_for_RNAseq_CO2_N_Fr.csv", h = T, sep = ";")
conditions <- as.vector(unique(annot$Sample))
annot$ID <- paste0("R", annot$Code)
annot$condition <- substr(conditions, 1, nchar(conditions) - 1)
cond <- unique(substr(conditions, 1, nchar(conditions) - 1))
cond <- cond[grepl("At", cond)]
getCondition <- function(id) {
return(annot[annot$ID == id, "condition"])
}
getExactCondition <- function(id) {
return(annot[annot$ID == id, "Sample"])
}
getLabel <- function(id) {
text <- as.vector(annot[annot$ID == id, "Sample"])
res <- ""
nb <- substr(text, nchar(text), nchar(text))
if (grepl("Ambient", text)) {
res = paste0(res, "c")
} else {
res = paste0(res, "C")
}
if (grepl("High", text)) {
res = paste0(res, "N")
} else {
res = paste0(res, "n")
}
if (grepl("Starv", text)) {
res = paste0(res, "f")
} else {
res = paste0(res, "F")
}
res = paste0(res, "_", nb)
return(res)
}
getLabel("R6")[1] "cnF_3"
[1] At_AmbientCO2_LowNitrate_Fe1
48 Levels: At_AmbientCO2_HighNitrate_Fe1 ... Sl_ElevatedCO2_LowNitrate_FeStarvation3
[1] "At_AmbientCO2_LowNitrate_Fe"
TCC is an R package that provides a series of functions for differential expression analysis of tag count data. The package incorporates multi-step normalization methods, whose strategy is to remove potential DEGs before performing the data normalization. The normalization function based on this DEG elimination strategy (DEGES) includes (i) the original TbT method based on DEGES for two-group data with or without replicates, (ii) much faster methods for two-group data with or without replicates, and (iii) methods for multi-group comparison. TCC provides a simple unified interface to perform such analyses with combinations of functions provided by edgeR, DESeq, and baySeq.
On crée l’objet TCC avec le design souhaité, et on filtre les gènes avec de faibles expressions (paramètre low.count).
Lors de la normalisation (DEGES,iedgeR), on fait un premier calcul des gènes DE, pour pouvoir les enlever lors de la normalisation. Le maramètre test.method permet de choisir la manière de détecter les genes DE (edgeR, DEsqe2, ou tBt (très long)). On peut répéter cette procédure jusqu’à la convergence des facteurs de taille des librairies, d’ou le i.
# design
keep <- rowSums(data) >= 10
data <- data[keep, ]
group <- sapply(colnames(data), getCondition)
colnames(data) <- sapply(colnames(data), getLabel)
tcc <- new("TCC", data, group)
tccCount:
cNF_3 cNF_2 cNF_1 cnF_2 cnF_1 cnF_3 CNF_1 CnF_2 CnF_1 CnF_3 cNf_1
AT1G01010 1526 1006 1116 1275 967 1018 854 1132 1294 1364 2325
AT1G01020 416 285 289 349 364 370 226 513 502 561 461
AT1G01030 31 15 19 29 36 28 12 47 34 47 18
cnf_2 cnf_1 cNf_2 cNf_3 cnf_3 Cnf_3 CNf_1 Cnf_1 Cnf_2 CNf_3 CNf_2
AT1G01010 2113 2193 2564 2364 2074 1987 2027 1754 1697 1537 1898
AT1G01020 407 432 614 380 502 484 467 426 415 413 462
AT1G01030 40 32 44 37 27 42 39 36 40 37 37
CNF_3 CNF_2
AT1G01010 816 912
AT1G01020 223 312
AT1G01030 15 19
[ reached getOption("max.print") -- omitted 3 rows ]
Sample:
group norm.factors lib.sizes
cNF_3 At_AmbientCO2_HighNitrate_Fe 1 32032772
cNF_2 At_AmbientCO2_HighNitrate_Fe 1 25708325
cNF_1 At_AmbientCO2_HighNitrate_Fe 1 25388926
cnF_2 At_AmbientCO2_LowNitrate_Fe 1 29410725
cnF_1 At_AmbientCO2_LowNitrate_Fe 1 26720600
cnF_3 At_AmbientCO2_LowNitrate_Fe 1 26429519
CNF_1 At_ElevatedCO2_HighNitrate_Fe 1 16944203
CnF_2 At_ElevatedCO2_LowNitrate_Fe 1 28518287
CnF_1 At_ElevatedCO2_LowNitrate_Fe 1 30134658
CnF_3 At_ElevatedCO2_LowNitrate_Fe 1 30939161
cNf_1 At_AmbientCO2_HighNitrate_FeStarvation 1 30097179
cnf_2 At_AmbientCO2_LowNitrate_FeStarvation 1 29411029
cnf_1 At_AmbientCO2_LowNitrate_FeStarvation 1 32417299
cNf_2 At_AmbientCO2_HighNitrate_FeStarvation 1 34497190
cNf_3 At_AmbientCO2_HighNitrate_FeStarvation 1 31947050
cnf_3 At_AmbientCO2_LowNitrate_FeStarvation 1 30166724
Cnf_3 At_ElevatedCO2_LowNitrate_FeStarvation 1 33780095
CNf_1 At_ElevatedCO2_HighNitrate_FeStarvation 1 30427501
Cnf_1 At_ElevatedCO2_LowNitrate_FeStarvation 1 29577130
Cnf_2 At_ElevatedCO2_LowNitrate_FeStarvation 1 30405383
CNf_3 At_ElevatedCO2_HighNitrate_FeStarvation 1 25049240
CNf_2 At_ElevatedCO2_HighNitrate_FeStarvation 1 28496520
CNF_3 At_ElevatedCO2_HighNitrate_Fe 1 18823627
CNF_2 At_ElevatedCO2_HighNitrate_Fe 1 20700711
tcc <- filterLowCountGenes(tcc, low.count = 10)
# Normalisation
tcc <- calcNormFactors(tcc, norm.method = "tmm", test.method = "edger", iteration = 1,
FDR = 0.1, floorPDEG = 0.05)
tcc$norm.factors cNF_3 cNF_2 cNF_1 cnF_2 cnF_1 cnF_3 CNF_1 CnF_2
0.9662816 0.9730845 0.8976118 0.9973121 1.0475825 1.0072335 0.9752235 1.0522596
CnF_1 CnF_3 cNf_1 cnf_2 cnf_1 cNf_2 cNf_3 cnf_3
1.0186883 1.0510472 1.0019289 0.9977532 1.0188031 1.0400636 0.9948079 1.0087956
Cnf_3 CNf_1 Cnf_1 Cnf_2 CNf_3 CNf_2 CNF_3 CNF_2
0.9381444 0.9961312 1.0067383 1.0059904 1.0542699 1.0349814 0.9249584 0.9903091
user system elapsed
10.658 0.747 11.445
s <- sample(rownames(tcc$count), size = 200)
heatmap(as.matrix(tcc$count[s, ]), main = "Before normalisation")normalized.count <- getNormalizedData(tcc)
heatmap(as.matrix(normalized.count[s, ]), main = "After normalisation")# DEtest
tcc <- estimateDE(tcc, test.method = "edger", FDR = 0.001)
result <- getResult(tcc, sort = TRUE)
DEgenes <- subset(result, estimatedDEG == 1)
print(paste(dim(DEgenes)[1], " genes DE"))[1] "13655 genes DE"
gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT1G02850 NA NA 0 0 45 1
2 AT1G03870 NA NA 0 0 45 1
3 AT1G05660 NA NA 0 0 45 1
4 AT1G06120 NA NA 0 0 45 1
5 AT1G09932 NA NA 0 0 45 1
6 AT1G12030 NA NA 0 0 45 1
plot(tcc, group = c("At_AmbientCO2_HighNitrate_Fe", "At_ElevatedCO2_HighNitrate_Fe"),
main = "Effet CO2 en conditions normales", ylim = c(-11, 11))plot(tcc, group = c("At_AmbientCO2_HighNitrate_FeStarvation", "At_ElevatedCO2_HighNitrate_FeStarvation"),
main = "Effet CO2 en Fe starvation", ylim = c(-11, 11))plot(tcc, group = c("At_AmbientCO2_LowNitrate_Fe", "At_ElevatedCO2_LowNitrate_Fe"),
main = "Effet CO2 en low Nitrate", ylim = c(-11, 11))plot(tcc, group = c("At_AmbientCO2_HighNitrate_Fe", "At_AmbientCO2_HighNitrate_FeStarvation"),
main = "Effet Fe en conditions normales", ylim = c(-11, 11))plot(tcc, group = c("At_AmbientCO2_LowNitrate_FeStarvation", "At_AmbientCO2_HighNitrate_FeStarvation"),
main = "Effet Nitrate en Fe starvation", ylim = c(-11, 11))# ggplot(melted_res, aes(Var2, Var1, fill= log(value))) + geom_tile() +
# scale_fill_distiller(palette = 'RdPu') + theme(axis.text.x = element_text(size
# = 0.5, angle = 320, hjust = 0, colour = 'grey50')) library(viridis) heatmap <-
# ggplot(melted_res, aes(Var2, Var1, fill= log(value))) + geom_raster() +
# scale_fill_viridis(trans='sqrt') + theme(axis.text.x=element_text(angle=65,
# hjust=1), axis.text.y=element_blank(), axis.ticks.y=element_blank()) heatmapFonction pour visualiser un gène étant donné son identifiant, en terme d’expression normalisée :
getExpression <- function(gene, conds = "all") {
print(conds)
if (length(conds) == 1) {
conds = colnames(normalized.count)
} else {
conds = grepl(conds[1], colnames(normalized.count)) | grepl(conds[2], colnames(normalized.count))
}
df <- normalized.count[gene, conds]
library(reshape2)
d <- melt(df)
d$group = str_split_fixed(rownames(d), "_", 2)[, 1]
p <- ggplot(data = d, aes(x = group, y = value, fill = group)) + geom_dotplot(binaxis = "y",
stackdir = "center") + theme(axis.text.x = element_text(angle = 320, hjust = 0,
colour = "grey50"), plot.title = element_text(size = 14, face = "bold")) +
ggtitle(paste("Normalized expression for ", gene))
p
}
getExpression("AT1G62510")[1] "all"
[1] "cNF" "CNF"
[1] "all"
Il semble que l’effet du fer soit le plus fort, et celui qui amplifie les autres effets. La technique utilisée ici identifie des DEG globalement, sans séparer lesquels sont dûs à quel effet.
Comme les contrastes ne sont pas donnés, on considère toutes les conditions comme 8 conditions indpendantes.
TCC fait le test globalement, avec un layout “one way”, qui prend les 8 conditions comme toutes différentes. En spécifiant un design de combinatoire \(2*2*2\), on peut avoir des coefficients testés contre 0 pour chacune des variables et leur interactions.
glmLRT conducts likelihood ratio tests for one or more coefficients in the linear model. If coef is used, the null hypothesis is that all the coefficients indicated by coef are equal to zero. If contrast is non-null, then the null hypothesis is that the specified contrasts of the coefficients are equal to zero. For example, a contrast of c(0,1,-1), assuming there are three coefficients, would test the hypothesis that the second and third coefficients are equal.
While the likelihood ratio test is a more obvious choice for inferences with GLMs, the QLF-test is preferred as it reflects the uncertainty in estimating the dispersion for each gene. Itprovides more robust and reliable error rate control when the number of replicates is small.The QL dispersion estimation and hypothesis testing can be done by using the functionsglmQLFit()andglmQLFTest()
# data
d <- data
# fixation correcte des contrastes et formule
# groups <- sapply(colnames(d), getLabel)
groups <- str_split_fixed(colnames(d), "_", 2)[, 1]
# colnames(d) <- sapply(colnames(d), getLabel)
y <- DGEList(counts = d, group = groups)
y$samples group lib.size norm.factors
cNF_3 cNF 32032772 1
cNF_2 cNF 25708325 1
cNF_1 cNF 25388926 1
cnF_2 cnF 29410725 1
cnF_1 cnF 26720600 1
cnF_3 cnF 26429519 1
CNF_1 CNF 16944203 1
CnF_2 CnF 28518287 1
CnF_1 CnF 30134658 1
CnF_3 CnF 30939161 1
cNf_1 cNf 30097179 1
cnf_2 cnf 29411029 1
cnf_1 cnf 32417299 1
cNf_2 cNf 34497190 1
cNf_3 cNf 31947050 1
cnf_3 cnf 30166724 1
Cnf_3 Cnf 33780095 1
CNf_1 CNf 30427501 1
Cnf_1 Cnf 29577130 1
Cnf_2 Cnf 30405383 1
CNf_3 CNf 25049240 1
CNf_2 CNf 28496520 1
CNF_3 CNF 18823627 1
CNF_2 CNF 20700711 1
# filtre
keep <- filterByExpr(y)
y <- y[keep, keep.lib.sizes = FALSE]
# normalisation
y <- calcNormFactors(y)
# norm <- cpm(y, normalized.lib.sizes=TRUE) not_norm <- cpm(y,
# normalized.lib.sizes=FALSE) rd_genes = sample(rownames(norm), 500)
# heatmap(not_norm[rd_genes,], main = 'Sans normalisation')
# heatmap(norm[rd_genes,], main = 'Avec normalisation')
# Definition du design et des bonnes conditions de référence
co2 <- str_split_fixed(groups, "", 3)[, 1]
nitrate <- factor(str_split_fixed(groups, "", 3)[, 2])
nitrate <- relevel(nitrate, "N")
fer <- factor(str_split_fixed(groups, "", 3)[, 3])
fer = relevel(fer, "F")
data.frame(Sample = groups, co2, nitrate, fer) Sample co2 nitrate fer
1 cNF c N F
2 cNF c N F
3 cNF c N F
4 cnF c n F
5 cnF c n F
6 cnF c n F
7 CNF C N F
8 CnF C n F
9 CnF C n F
10 CnF C n F
11 cNf c N f
12 cnf c n f
13 cnf c n f
14 cNf c N f
15 cNf c N f
16 cnf c n f
17 Cnf C n f
18 CNf C N f
[ reached 'max' / getOption("max.print") -- omitted 6 rows ]
| (Intercept) | co2C | nitraten | ferf | co2C:nitraten | co2C:ferf | nitraten:ferf | co2C:nitraten:ferf | |
|---|---|---|---|---|---|---|---|---|
| cNF_3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| cNF_2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| cNF_1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| cnF_2 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| cnF_1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| cnF_3 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| CNF_1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| CnF_2 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 0 |
| CnF_1 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 0 |
| CnF_3 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 0 |
| cNf_1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| cnf_2 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 |
| cnf_1 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 |
| cNf_2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| cNf_3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| cnf_3 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 |
| Cnf_3 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| CNf_1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 |
| Cnf_1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cnf_2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| CNf_3 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 |
| CNf_2 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 |
| CNF_3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| CNF_2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
# design <- model.matrix(~0 + groups) con <- makeContrasts((groupscnF -
# groupscnf), levels=design) fit <- glmQLFit(y, contrast = con) qlf.2vs1 <-
# glmQLFTest(fit) a = topTags(qlf.2vs1, n= 1000) a$table
# estimation of dispersion and tests
y <- edgeR::estimateDisp(y, design)
y$common.dispersion[1] 0.01113512
fit <- glmFit(y, design)
pval = 0.01
# Genewise Negative Binomial Generalized Linear Models
for (coef in colnames(design)) {
print(coef)
lrt <- glmLRT(fit, coef = coef)
print(summary(decideTests(lrt)), p.value = pval)
plotMD(lrt, cex = 0.25)
}[1] "(Intercept)"
(Intercept)
Down 21050
NotSig 0
Up 0
[1] "co2C"
co2C
Down 83
NotSig 20933
Up 34
[1] "nitraten"
nitraten
Down 2721
NotSig 16604
Up 1725
[1] "ferf"
ferf
Down 5306
NotSig 10429
Up 5315
[1] "co2C:nitraten"
co2C:nitraten
Down 190
NotSig 20465
Up 395
[1] "co2C:ferf"
co2C:ferf
Down 1205
NotSig 18321
Up 1524
[1] "nitraten:ferf"
nitraten:ferf
Down 2966
NotSig 14244
Up 3840
[1] "co2C:nitraten:ferf"
co2C:nitraten:ferf
Down 986
NotSig 18833
Up 1231
# pval = 0.01 #et <- exactTest(y) best = topTags(lrt, n = 10000) best =
# best[best$table$FDR < pval,] #best = best[abs(best$table$logFC) > 1,] #genes <-
# rownames(cpm(y, normalized.lib.sizes=TRUE)) DEgenes = rownames(best$table)
# plotBCV(y) abline(h=c(-1, 1), col='blue') abline(h=c(0), col='red')
# print(summary(decideTests(lrt, p.value = 0.05))) print(summary(decideTests(lrt,
# p.value = 0.01))) #plots plotMDS(y, top = 100)dualDE <- function(labels, pval = 0.01, method = "edgeR") {
d <- data[, grepl(labels[1], colnames(data)) | grepl(labels[2], colnames(data))]
if (method == "edgeR") {
# data
y <- DGEList(counts = d, group = str_split_fixed(colnames(d), "_", 2)[, 1])
y$samples
# filtre
keep <- filterByExpr(y)
y <- y[keep, , keep.lib.sizes = FALSE]
# normalisation
y <- calcNormFactors(y)
y$samples
norm <- cpm(y, normalized.lib.sizes = TRUE)
not_norm <- cpm(y, normalized.lib.sizes = FALSE)
rd_genes = sample(rownames(norm), 500)
heatmap(not_norm[rd_genes, ], main = "Sans normalisation")
heatmap(norm[rd_genes, ], main = "Avec normalisation")
# estimation of dispersion and tests
y <- edgeR::estimateDisp(y)
et <- exactTest(y)
best = topTags(et, n = 10000)
best = best[best$table$FDR < pval, ]
# best = best[abs(best$table$logFC) > 1,]
# genes <- rownames(cpm(y, normalized.lib.sizes=TRUE))
DEgenes = rownames(best$table)
plotBCV(y)
plotMD(et, p.value = pval)
abline(h = c(-1, 1), col = "blue")
abline(h = c(0), col = "red")
print(summary(decideTests(et, p.value = pval)))
# plots
plotMDS(y, top = 100, col = rep(1:2, each = 3))
logcpm <- cpm(y, log = TRUE)
heatmap(logcpm[DEgenes, ])
print("ON SELECTIONNE AU TOTAL :")
print(paste(length(DEgenes), "genes with fdr < ", pval))
results <- getBM(filters = "ensembl_gene_id", attributes = c("ensembl_gene_id",
"description", "external_gene_name", "entrezgene_id"), values = DEgenes,
mart = mart)
results <- results[!rownames(results) %in% which(duplicated(results$ensembl_gene_id)),
]
rownames(results) = results$ensembl_gene_id
r = results[DEgenes, ]
r$pval = best$table$PValue
r = r[order(r$pval), ]
return(r)
}
if (method == "DESeq") {
design = data.frame(row.names = colnames(d), condition = str_split_fixed(colnames(d),
"_", 2)[, 1], libType = rep("paired-end", length(colnames(d))))
dds <- DESeq2::DESeqDataSetFromMatrix(countData = d, colData = design, design = ~condition)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]
# dds$condition <- relevel(dds$condition, ref = 'untreated')
dds <- DESeq2::DESeq(dds)
res <- na.omit(results(dds))
resOrdered <- res[order(res$padj), ]
sum(res$padj < pval, na.rm = TRUE)
DESeq2::plotMA(resOrdered)
DEgenes <- rownames(res[res$padj < pval, ])
res <- res[res$padj < pval, ]
results <- getBM(filters = "ensembl_gene_id", attributes = c("ensembl_gene_id",
"description", "external_gene_name", "entrezgene_id"), values = DEgenes,
mart = mart)
results <- results[!rownames(results) %in% which(duplicated(results$ensembl_gene_id)),
]
rownames(results) = results$ensembl_gene_id
r <- results[DEgenes, ]
r$pval = res$padj
r = r[order(r$pval), ]
return(r)
}
}
OntologyProfile <- function(r) {
ego <- enrichGO(gene = r$entrezgene_id, OrgDb = org.At.tair.db, ont = "BP", pAdjustMethod = "BH",
pvalueCutoff = 0.01, qvalueCutoff = 0.05, readable = TRUE)
simpOnt <- simplify(ego, cutoff = 0.7, by = "p.adjust", select_fun = min)
simpOnt@result$Description
print(barplot(simpOnt, showCategory = 40, font.size = 5))
print(dotplot(simpOnt, showCategory = 40, font.size = 5))
print(emapplot(simpOnt, layout = "kk"))
}
mat <- matrix(nrow = length(unique(groups)), ncol = length(unique(groups)))
comparables = c()
for (c1 in unique(groups)) {
for (c2 in unique(groups)) {
nb_diffs = 0
for (i in seq(1, 3)) {
if (str_split(c1, "")[[1]][i] != str_split(c2, "")[[1]][i]) {
nb_diffs = nb_diffs + 1
}
}
if (nb_diffs == 1) {
comparables = c(comparables, paste0(c1, "-", c2))
}
}
}
getDEgenes <- function(labelsConc, pval = 0.01, method = "edgeR") {
# data
labels = c(str_split_fixed(labelsConc, "-", 2)[, 1], str_split_fixed(labelsConc,
"-", 2)[, 2])
print(labels)
d <- data[, grepl(labels[1], colnames(data)) | grepl(labels[2], colnames(data))]
if (method == "edgeR") {
y <- DGEList(counts = d, group = str_split_fixed(colnames(d), "_", 2)[, 1])
y$samples
# filtre
keep <- filterByExpr(y)
y <- y[keep, , keep.lib.sizes = FALSE]
# normalisation
y <- calcNormFactors(y)
y$samples
# estimation of dispersion and tests
y <- edgeR::estimateDisp(y)
et <- exactTest(y)
best = topTags(et, n = 20000)
best = best[best$table$FDR < pval, ]
# best = best[abs(best$table$logFC) > 1,]
DEgenes = rownames(best$table)
return(DEgenes)
}
if (method == "DESeq") {
design = data.frame(row.names = colnames(d), condition = str_split_fixed(colnames(d),
"_", 2)[, 1], libType = rep("paired-end", length(colnames(d))))
dds <- DESeq2::DESeqDataSetFromMatrix(countData = d, colData = design, design = ~condition)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]
# dds$condition <- relevel(dds$condition, ref = 'untreated')
dds <- DESeq2::DESeq(dds)
res <- na.omit(results(dds))
resOrdered <- res[order(res$padj), ]
sum(res$padj < pval, na.rm = TRUE)
DEgenes <- rownames(res[res$padj < pval, ])
return(DEgenes)
}
}
# getDEgenes <- function(labelsConc, pval = 0.01){ #data labels =
# c(str_split_fixed(labelsConc, '-',2)[,1], str_split_fixed(labelsConc,
# '-',2)[,2]) print( labels) d <- data[,grepl(labels[1], colnames(data)) |
# grepl(labels[2], colnames(data))] y <- DGEList(counts=d, group
# =str_split_fixed(colnames(d), '_', 2)[,1]) y$samples #filtre keep <-
# filterByExpr(y) y <- y[keep, , keep.lib.sizes=FALSE] #normalisation y <-
# calcNormFactors(y) y$samples # estimation of dispersion and tests y <-
# edgeR::estimateDisp(y) et <- exactTest(y) best = topTags(et, n = 20000) best =
# best[best$table$FDR < pval,] #best = best[abs(best$table$logFC) > 1,] #genes <-
# rownames(cpm(y, normalized.lib.sizes=TRUE)) DEgenes = rownames(best$table)
# return(DEgenes) }Design matrix not provided. Switch to the classic mode.
cNF-cnF
Down 758
NotSig 17187
Up 2054
[1] "ON SELECTIONNE AU TOTAL :"
[1] "2812 genes with fdr < 0.01"
| ensembl_gene_id | description | external_gene_name | entrezgene_id | pval | |
|---|---|---|---|---|---|
| AT3G45060 | AT3G45060 | High affinity nitrate transporter 2.6 [Source:UniProtKB/Swiss-Prot;Acc:Q9LXH0] | NRT2.6 | 823641 | 0 |
| ensembl_gene_id | description | external_gene_name | entrezgene_id | pval | |
|---|---|---|---|---|---|
| AT3G45060 | AT3G45060 | High affinity nitrate transporter 2.6 [Source:UniProtKB/Swiss-Prot;Acc:Q9LXH0] | NRT2.6 | 823641 | 0 |
| ensembl_gene_id | description | external_gene_name | entrezgene_id | pval | |
|---|---|---|---|---|---|
| AT3G45060 | AT3G45060 | High affinity nitrate transporter 2.6 [Source:UniProtKB/Swiss-Prot;Acc:Q9LXH0] | NRT2.6 | 823641 | 0.0000000 |
| AT2G14210 | AT2G14210 | AGAMOUS-like 44 [Source:TAIR;Acc:AT2G14210] | ANR1 | 815907 | 0.0000000 |
| AT1G80830 | AT1G80830 | Metal transporter Nramp1 [Source:UniProtKB/Swiss-Prot;Acc:Q9SAH8] | NRAMP1 | 844422 | 0.0000986 |
| AT2G04630 | AT2G04630 | DNA-directed RNA polymerases II and V subunit 6B [Source:UniProtKB/Swiss-Prot;Acc:Q9SJ96] | NRPB6B | 815006 | 0.0001547 |
[1] "all"
[1] "cnF" "cNF"
[1] "all"
[1] "cnF" "cNF"
[1] FALSE
NRT2.1 ne semble pas être différentiellement exprimé, NAR2 oui par contre.
On retrouve une majorité de gènes activés par le fort nitrate. Les résultats sont un peu similaires à ceux du jeu de données d’entrainement de Nature entre témoin au KCl et Traitement au NO3.
Design matrix not provided. Switch to the classic mode.
CNF-CnF
Down 512
NotSig 18162
Up 1259
[1] "ON SELECTIONNE AU TOTAL :"
[1] "1771 genes with fdr < 0.01"
| ensembl_gene_id | description | external_gene_name | entrezgene_id | pval | |
|---|---|---|---|---|---|
| AT3G45060 | AT3G45060 | High affinity nitrate transporter 2.6 [Source:UniProtKB/Swiss-Prot;Acc:Q9LXH0] | NRT2.6 | 823641 | 0 |
| AT5G50200 | AT5G50200 | High-affinity nitrate transporter 3.1 [Source:UniProtKB/Swiss-Prot;Acc:Q9FGS5] | NRT3.1 | 835085 | 0 |
[1] "CnF" "CNF"
[1] "CnF" "CNF"
Design matrix not provided. Switch to the classic mode.
CNF-cNF
Down 46
NotSig 19757
Up 18
[1] "ON SELECTIONNE AU TOTAL :"
[1] "64 genes with fdr < 0.01"
| ensembl_gene_id | description | external_gene_name | entrezgene_id | pval | |
|---|---|---|---|---|---|
| AT1G19250 | AT1G19250 | Probable flavin-containing monooxygenase 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9LMA1] | FMO1 | 838508 | 0.00e+00 |
| AT5G64120 | AT5G64120 | Peroxidase [Source:UniProtKB/TrEMBL;Acc:A0A178UAR8] | PER71 | 836533 | 0.00e+00 |
| AT4G25100 | AT4G25100 | Superoxide dismutase [Fe] 1, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:P21276] | FSD1 | 828613 | 0.00e+00 |
| AT5G59520 | AT5G59520 | Zinc transporter 2 [Source:UniProtKB/Swiss-Prot;Acc:Q9LTH9] | ZIP2 | 836071 | 0.00e+00 |
| AT1G67270 | AT1G67270 | Zinc-finger domain of monoamine-oxidase A repressor R1 protein [Source:TAIR;Acc:AT1G67270] | 843047 | 0.00e+00 | |
| AT2G01520 | AT2G01520 | MLP-like protein 328 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZVF3] | MLP328 | 814681 | 0.00e+00 |
| AT5G23980 | AT5G23980 | Ferric reduction oxidase 4 [Source:UniProtKB/Swiss-Prot;Acc:Q8W110] | FRO4 | 832463 | 0.00e+00 |
| AT4G37060 | AT4G37060 | PATATIN-like protein 5 [Source:TAIR;Acc:AT4G37060] | PLP5 | 829860 | 0.00e+00 |
| AT4G33560 | AT4G33560 | At4g33560 [Source:UniProtKB/TrEMBL;Acc:O81873] | 829495 | 0.00e+00 | |
| AT2G35980 | AT2G35980 | NDR1/HIN1-like protein 10 [Source:UniProtKB/Swiss-Prot;Acc:Q9SJ52] | NHL10 | 818171 | 0.00e+00 |
| AT1G73120 | AT1G73120 | At1g73120 [Source:UniProtKB/TrEMBL;Acc:Q9CAS9] | 843643 | 0.00e+00 | |
| AT5G23990 | AT5G23990 | ferric reduction oxidase 5 [Source:TAIR;Acc:AT5G23990] | ATFRO5 | 832464 | 0.00e+00 |
| AT3G12900 | AT3G12900 | 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily protein [Source:UniProtKB/TrEMBL;Acc:Q9LE86] | 820473 | 0.00e+00 | |
| AT5G44110 | AT5G44110 | ABC transporter I family member 21 [Source:UniProtKB/Swiss-Prot;Acc:Q9XF19] | ABCI21 | 834434 | 0.00e+00 |
| AT3G56290 | AT3G56290 | Potassium transporter [Source:UniProtKB/TrEMBL;Acc:Q9LYL4] | 824796 | 1.00e-07 | |
| AT1G62510 | AT1G62510 | Bifunctional inhibitor/lipid-transfer protein/seed storage 2S albumin superfamily protein [Source:UniProtKB/TrEMBL;Acc:Q9SXE6] | 842548 | 1.00e-07 | |
| AT4G02330 | AT4G02330 | Probable pectinesterase/pectinesterase inhibitor 41 [Source:UniProtKB/Swiss-Prot;Acc:Q8RXK7] | PME41 | 828064 | 1.00e-07 |
| AT1G05880 | AT1G05880 | Probable E3 ubiquitin-protein ligase ARI12 [Source:UniProtKB/Swiss-Prot;Acc:Q84RQ9] | ARI12 | 837098 | 2.00e-07 |
| AT1G55790 | AT1G55790 | Domain of unknown function (DUF2431) [Source:TAIR;Acc:AT1G55790] | NA | 2.00e-07 | |
| AT4G25480 | AT4G25480 | Dehydration-responsive element-binding protein 1A [Source:UniProtKB/Swiss-Prot;Acc:Q9M0L0] | DREB1A | 828652 | 3.00e-07 |
| AT2G39040 | AT2G39040 | Peroxidase 24 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZV04] | PER24 | 818490 | 3.00e-07 |
| AT4G31940 | AT4G31940 | Cytochrome P450 82C4 [Source:UniProtKB/Swiss-Prot;Acc:Q9SZ46] | CYP82C4 | 829324 | 3.00e-07 |
| AT1G44030 | AT1G44030 | Cysteine/Histidine-rich C1 domain family protein [Source:TAIR;Acc:AT1G44030] | 841003 | 3.00e-07 | |
| AT5G39580 | AT5G39580 | Peroxidase 62 [Source:UniProtKB/Swiss-Prot;Acc:Q9FKA4] | PER62 | 833954 | 4.00e-07 |
| AT4G11170 | AT4G11170 | Putative disease resistance protein At4g11170 [Source:UniProtKB/Swiss-Prot;Acc:O82500] | 826718 | 4.00e-07 | |
| AT4G33980 | AT4G33980 | BEST Arabidopsis thaliana protein match is: cold regulated gene 27 (TAIR:AT5G42900.2); Ha. [Source:TAIR;Acc:AT4G33980] | 829544 | 7.00e-07 | |
| AT5G39890 | AT5G39890 | Plant cysteine oxidase 2 [Source:UniProtKB/Swiss-Prot;Acc:Q8LGJ5] | PCO2 | 833986 | 9.00e-07 |
| AT4G33720 | AT4G33720 | AT4g33720/T16L1_210 [Source:UniProtKB/TrEMBL;Acc:O81888] | 829514 | 1.00e-06 | |
| AT1G51900 | AT1G51900 | Regulator of Vps4 activity in the MVB pathway protein [Source:UniProtKB/TrEMBL;Acc:F4IB75] | 841617 | 1.10e-06 | |
| AT1G01680 | AT1G01680 | U-box domain-containing protein 54 [Source:UniProtKB/Swiss-Prot;Acc:Q9LQ92] | PUB54 | 839251 | 1.20e-06 |
| AT3G03270 | AT3G03270 | AT3G03270 protein [Source:UniProtKB/TrEMBL;Acc:Q8LFK2] | 821303 | 1.30e-06 | |
| AT2G02950 | AT2G02950 | Protein PHYTOCHROME KINASE SUBSTRATE 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9SWI1] | PKS1 | 814823 | 1.40e-06 |
| AT1G43800 | AT1G43800 | Stearoyl-[acyl-carrier-protein] 9-desaturase 6, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:Q84VY3] | S-ACP-DES6 | 840977 | 1.50e-06 |
| AT1G01560 | AT1G01560 | Mitogen-activated protein kinase 11 [Source:UniProtKB/Swiss-Prot;Acc:Q9LMM5] | MPK11 | 839523 | 1.70e-06 |
| AT2G45760 | AT2G45760 | BON1-associated protein 2 [Source:UniProtKB/Swiss-Prot;Acc:Q58FX0] | BAP2 | 819184 | 1.80e-06 |
| AT1G01060 | AT1G01060 | LHY1 [Source:UniProtKB/TrEMBL;Acc:A0A178W761] | LHY | 839341 | 2.40e-06 |
| AT5G44190 | AT5G44190 | Transcription activator GLK2 [Source:UniProtKB/Swiss-Prot;Acc:Q9FFH0] | GLK2 | 834442 | 3.20e-06 |
| AT5G56080 | AT5G56080 | NAS2 [Source:UniProtKB/TrEMBL;Acc:A0A178ULG9] | NAS2 | 835707 | 3.70e-06 |
| AT1G12805 | AT1G12805 | Nucleotide binding protein [Source:UniProtKB/TrEMBL;Acc:Q3EDD5] | 2745749 | 3.90e-06 | |
| AT3G27650 | AT3G27650 | LOB domain-containing protein 25 [Source:UniProtKB/Swiss-Prot;Acc:Q8L8Q3] | LBD25 | 822387 | 4.20e-06 |
| AT3G53150 | AT3G53150 | UDP-glucosyl transferase 73D1 [Source:TAIR;Acc:AT3G53150] | UGT73D1 | NA | 4.50e-06 |
| AT1G18970 | AT1G18970 | Germin-like protein subfamily T member 1 [Source:UniProtKB/Swiss-Prot;Acc:P92995] | GLP1 | 838478 | 4.60e-06 |
| AT4G14368 | AT4G14368 | Regulator of chromosome condensation (RCC1) family protein [Source:UniProtKB/TrEMBL;Acc:F4JVE8] | 7922506 | 5.40e-06 | |
| AT1G76930 | AT1G76930 | extensin 4 [Source:TAIR;Acc:AT1G76930] | ATEXT4 | NA | 5.40e-06 |
| AT4G38420 | AT4G38420 | Putative pectinesterase [Source:UniProtKB/TrEMBL;Acc:Q8VYB3] | sks9 | 829999 | 5.80e-06 |
| AT1G14550 | AT1G14550 | Peroxidase 5 [Source:UniProtKB/Swiss-Prot;Acc:Q9M9Q9] | PER5 | 838017 | 6.40e-06 |
| AT1G64170 | AT1G64170 | Cation/H(+) antiporter 16 [Source:UniProtKB/Swiss-Prot;Acc:Q1HDT3] | CHX16 | 842721 | 7.00e-06 |
| AT5G02780 | AT5G02780 | Glutathione S-transferase L1 [Source:UniProtKB/Swiss-Prot;Acc:Q6NLB0] | GSTL1 | 831800 | 7.60e-06 |
| AT3G07650 | AT3G07650 | COL9 [Source:UniProtKB/TrEMBL;Acc:A0A384KVH3] | COL9 | 819956 | 7.90e-06 |
| AT1G61480 | AT1G61480 | G-type lectin S-receptor-like serine/threonine-protein kinase At1g61480 [Source:UniProtKB/Swiss-Prot;Acc:O64771] | 842442 | 8.40e-06 | |
| AT3G08970 | AT3G08970 | TMS1 [Source:UniProtKB/TrEMBL;Acc:A0A178VJB6] | ERDJ3A | 820049 | 1.03e-05 |
| AT5G60100 | AT5G60100 | Pseudo-response regulator 3 [Source:UniProtKB/TrEMBL;Acc:F4JXG7] | PRR3 | 836132 | 1.04e-05 |
| AT1G77530 | AT1G77530 | O-methyltransferase family protein [Source:UniProtKB/TrEMBL;Acc:Q9CAQ3] | 844089 | 1.10e-05 | |
| AT1G21240 | AT1G21240 | Wall-associated receptor kinase 3 [Source:UniProtKB/Swiss-Prot;Acc:Q9LMN8] | WAK3 | 838719 | 1.15e-05 |
| AT3G63380 | AT3G63380 | Calcium-transporting ATPase [Source:UniProtKB/TrEMBL;Acc:A0A178VEV7] | ACA12 | 825513 | 1.33e-05 |
| AT5G26920 | AT5G26920 | Calmodulin-binding protein 60 G [Source:UniProtKB/Swiss-Prot;Acc:F4K2R6] | CBP60G | 832750 | 1.51e-05 |
| AT4G12550 | AT4G12550 | Putative lipid-binding protein AIR1 [Source:UniProtKB/Swiss-Prot;Acc:Q9S7I2] | AIR1 | 826868 | 1.53e-05 |
| AT4G21380 | AT4G21380 | Receptor-like serine/threonine-protein kinase SD1-8 [Source:UniProtKB/Swiss-Prot;Acc:O81905] | SD18 | 827890 | 1.58e-05 |
| AT3G08040 | AT3G08040 | Protein DETOXIFICATION 43 [Source:UniProtKB/Swiss-Prot;Acc:Q9SFB0] | DTX43 | 819995 | 1.69e-05 |
| AT4G10500 | AT4G10500 | Protein DMR6-LIKE OXYGENASE 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZSA8] | DLO1 | 826642 | 1.69e-05 |
| AT3G47720 | AT3G47720 | Probable inactive poly [ADP-ribose] polymerase SRO4 [Source:UniProtKB/Swiss-Prot;Acc:Q9STU1] | SRO4 | 823926 | 1.80e-05 |
| AT4G10510 | AT4G10510 | Subtilisin-like protease SBT3.7 [Source:UniProtKB/Swiss-Prot;Acc:Q9SZY2] | SBT3.7 | 826643 | 2.19e-05 |
| AT5G15120 | AT5G15120 | Plant cysteine oxidase 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9LXG9] | PCO1 | 831364 | 2.30e-05 |
| AT2G33790 | AT2G33790 | Non-classical arabinogalactan protein 30 [Source:UniProtKB/Swiss-Prot;Acc:P93013] | AGP30 | 817946 | 3.16e-05 |
| ensembl_gene_id | description | external_gene_name | entrezgene_id | pval | |
|---|---|---|---|---|---|
| AT5G64120 | AT5G64120 | Peroxidase [Source:UniProtKB/TrEMBL;Acc:A0A178UAR8] | PER71 | 836533 | 0.0e+00 |
| AT4G25100 | AT4G25100 | Superoxide dismutase [Fe] 1, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:P21276] | FSD1 | 828613 | 0.0e+00 |
| AT1G67270 | AT1G67270 | Zinc-finger domain of monoamine-oxidase A repressor R1 protein [Source:TAIR;Acc:AT1G67270] | 843047 | 0.0e+00 | |
| AT5G23980 | AT5G23980 | Ferric reduction oxidase 4 [Source:UniProtKB/Swiss-Prot;Acc:Q8W110] | FRO4 | 832463 | 0.0e+00 |
| AT5G23990 | AT5G23990 | ferric reduction oxidase 5 [Source:TAIR;Acc:AT5G23990] | ATFRO5 | 832464 | 0.0e+00 |
| AT2G39040 | AT2G39040 | Peroxidase 24 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZV04] | PER24 | 818490 | 3.0e-07 |
| AT5G39580 | AT5G39580 | Peroxidase 62 [Source:UniProtKB/Swiss-Prot;Acc:Q9FKA4] | PER62 | 833954 | 4.0e-07 |
| AT5G39890 | AT5G39890 | Plant cysteine oxidase 2 [Source:UniProtKB/Swiss-Prot;Acc:Q8LGJ5] | PCO2 | 833986 | 9.0e-07 |
| AT1G14550 | AT1G14550 | Peroxidase 5 [Source:UniProtKB/Swiss-Prot;Acc:Q9M9Q9] | PER5 | 838017 | 6.4e-06 |
| AT5G15120 | AT5G15120 | Plant cysteine oxidase 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9LXG9] | PCO1 | 831364 | 2.3e-05 |
| ensembl_gene_id | description | external_gene_name | entrezgene_id | pval | |
|---|---|---|---|---|---|
| AT1G19250 | AT1G19250 | Probable flavin-containing monooxygenase 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9LMA1] | FMO1 | 838508 | 0.0000000 |
| AT4G25100 | AT4G25100 | Superoxide dismutase [Fe] 1, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:P21276] | FSD1 | 828613 | 0.0000000 |
| AT5G64120 | AT5G64120 | Peroxidase [Source:UniProtKB/TrEMBL;Acc:A0A178UAR8] | PER71 | 836533 | 0.0000000 |
| AT2G01520 | AT2G01520 | MLP-like protein 328 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZVF3] | MLP328 | 814681 | 0.0000002 |
| AT5G59520 | AT5G59520 | Zinc transporter 2 [Source:UniProtKB/Swiss-Prot;Acc:Q9LTH9] | ZIP2 | 836071 | 0.0000002 |
| AT1G73120 | AT1G73120 | At1g73120 [Source:UniProtKB/TrEMBL;Acc:Q9CAS9] | 843643 | 0.0000056 | |
| AT5G23980 | AT5G23980 | Ferric reduction oxidase 4 [Source:UniProtKB/Swiss-Prot;Acc:Q8W110] | FRO4 | 832463 | 0.0000056 |
| AT4G33560 | AT4G33560 | At4g33560 [Source:UniProtKB/TrEMBL;Acc:O81873] | 829495 | 0.0000065 | |
| AT1G67270 | AT1G67270 | Zinc-finger domain of monoamine-oxidase A repressor R1 protein [Source:TAIR;Acc:AT1G67270] | 843047 | 0.0000076 | |
| AT4G37060 | AT4G37060 | PATATIN-like protein 5 [Source:TAIR;Acc:AT4G37060] | PLP5 | 829860 | 0.0000080 |
| AT2G35980 | AT2G35980 | NDR1/HIN1-like protein 10 [Source:UniProtKB/Swiss-Prot;Acc:Q9SJ52] | NHL10 | 818171 | 0.0000481 |
| AT5G44110 | AT5G44110 | ABC transporter I family member 21 [Source:UniProtKB/Swiss-Prot;Acc:Q9XF19] | ABCI21 | 834434 | 0.0000901 |
| AT4G33720 | AT4G33720 | AT4g33720/T16L1_210 [Source:UniProtKB/TrEMBL;Acc:O81888] | 829514 | 0.0002684 | |
| AT1G62510 | AT1G62510 | Bifunctional inhibitor/lipid-transfer protein/seed storage 2S albumin superfamily protein [Source:UniProtKB/TrEMBL;Acc:Q9SXE6] | 842548 | 0.0004274 | |
| AT4G02330 | AT4G02330 | Probable pectinesterase/pectinesterase inhibitor 41 [Source:UniProtKB/Swiss-Prot;Acc:Q8RXK7] | PME41 | 828064 | 0.0005787 |
| AT1G43800 | AT1G43800 | Stearoyl-[acyl-carrier-protein] 9-desaturase 6, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:Q84VY3] | S-ACP-DES6 | 840977 | 0.0006119 |
| AT1G05880 | AT1G05880 | Probable E3 ubiquitin-protein ligase ARI12 [Source:UniProtKB/Swiss-Prot;Acc:Q84RQ9] | ARI12 | 837098 | 0.0006745 |
| AT2G39040 | AT2G39040 | Peroxidase 24 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZV04] | PER24 | 818490 | 0.0006745 |
| AT1G18970 | AT1G18970 | Germin-like protein subfamily T member 1 [Source:UniProtKB/Swiss-Prot;Acc:P92995] | GLP1 | 838478 | 0.0006858 |
| AT3G56290 | AT3G56290 | Potassium transporter [Source:UniProtKB/TrEMBL;Acc:Q9LYL4] | 824796 | 0.0008638 | |
| AT5G23990 | AT5G23990 | ferric reduction oxidase 5 [Source:TAIR;Acc:AT5G23990] | ATFRO5 | 832464 | 0.0008638 |
| AT4G11170 | AT4G11170 | Putative disease resistance protein At4g11170 [Source:UniProtKB/Swiss-Prot;Acc:O82500] | 826718 | 0.0009132 | |
| AT4G25480 | AT4G25480 | Dehydration-responsive element-binding protein 1A [Source:UniProtKB/Swiss-Prot;Acc:Q9M0L0] | DREB1A | 828652 | 0.0009935 |
| AT5G56080 | AT5G56080 | NAS2 [Source:UniProtKB/TrEMBL;Acc:A0A178ULG9] | NAS2 | 835707 | 0.0009935 |
| AT3G03270 | AT3G03270 | AT3G03270 protein [Source:UniProtKB/TrEMBL;Acc:Q8LFK2] | 821303 | 0.0011646 | |
| AT5G39580 | AT5G39580 | Peroxidase 62 [Source:UniProtKB/Swiss-Prot;Acc:Q9FKA4] | PER62 | 833954 | 0.0011646 |
| AT1G76930 | AT1G76930 | extensin 4 [Source:TAIR;Acc:AT1G76930] | ATEXT4 | NA | 0.0013194 |
| AT5G39890 | AT5G39890 | Plant cysteine oxidase 2 [Source:UniProtKB/Swiss-Prot;Acc:Q8LGJ5] | PCO2 | 833986 | 0.0014364 |
| AT1G44030 | AT1G44030 | Cysteine/Histidine-rich C1 domain family protein [Source:TAIR;Acc:AT1G44030] | 841003 | 0.0014760 | |
| AT4G33980 | AT4G33980 | BEST Arabidopsis thaliana protein match is: cold regulated gene 27 (TAIR:AT5G42900.2); Ha. [Source:TAIR;Acc:AT4G33980] | 829544 | 0.0014760 | |
| AT1G14550 | AT1G14550 | Peroxidase 5 [Source:UniProtKB/Swiss-Prot;Acc:Q9M9Q9] | PER5 | 838017 | 0.0015432 |
| AT1G55790 | AT1G55790 | Domain of unknown function (DUF2431) [Source:TAIR;Acc:AT1G55790] | NA | 0.0021458 | |
| AT2G02950 | AT2G02950 | Protein PHYTOCHROME KINASE SUBSTRATE 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9SWI1] | PKS1 | 814823 | 0.0021779 |
| AT1G77530 | AT1G77530 | O-methyltransferase family protein [Source:UniProtKB/TrEMBL;Acc:Q9CAQ3] | 844089 | 0.0025354 | |
| AT1G12805 | AT1G12805 | Nucleotide binding protein [Source:UniProtKB/TrEMBL;Acc:Q3EDD5] | 2745749 | 0.0030373 | |
| AT5G44190 | AT5G44190 | Transcription activator GLK2 [Source:UniProtKB/Swiss-Prot;Acc:Q9FFH0] | GLK2 | 834442 | 0.0037070 |
| AT1G01680 | AT1G01680 | U-box domain-containing protein 54 [Source:UniProtKB/Swiss-Prot;Acc:Q9LQ92] | PUB54 | 839251 | 0.0037257 |
| AT4G12550 | AT4G12550 | Putative lipid-binding protein AIR1 [Source:UniProtKB/Swiss-Prot;Acc:Q9S7I2] | AIR1 | 826868 | 0.0039020 |
| AT3G27650 | AT3G27650 | LOB domain-containing protein 25 [Source:UniProtKB/Swiss-Prot;Acc:Q8L8Q3] | LBD25 | 822387 | 0.0043637 |
| AT5G02780 | AT5G02780 | Glutathione S-transferase L1 [Source:UniProtKB/Swiss-Prot;Acc:Q6NLB0] | GSTL1 | 831800 | 0.0046956 |
| AT1G06040 | AT1G06040 | B-box zinc finger protein 24 [Source:UniProtKB/Swiss-Prot;Acc:Q96288] | BBX24 | 837113 | 0.0062472 |
| AT1G64170 | AT1G64170 | Cation/H(+) antiporter 16 [Source:UniProtKB/Swiss-Prot;Acc:Q1HDT3] | CHX16 | 842721 | 0.0062472 |
| AT2G06850 | AT2G06850 | Xyloglucan endotransglucosylase/hydrolase protein 4 [Source:UniProtKB/Swiss-Prot;Acc:Q39099] | XTH4 | 815247 | 0.0066845 |
| AT3G53150 | AT3G53150 | UDP-glucosyl transferase 73D1 [Source:TAIR;Acc:AT3G53150] | UGT73D1 | NA | 0.0069327 |
| AT2G45760 | AT2G45760 | BON1-associated protein 2 [Source:UniProtKB/Swiss-Prot;Acc:Q58FX0] | BAP2 | 819184 | 0.0070184 |
| AT1G01060 | AT1G01060 | LHY1 [Source:UniProtKB/TrEMBL;Acc:A0A178W761] | LHY | 839341 | 0.0081375 |
| AT2G33790 | AT2G33790 | Non-classical arabinogalactan protein 30 [Source:UniProtKB/Swiss-Prot;Acc:P93013] | AGP30 | 817946 | 0.0086581 |
| AT1G12090 | AT1G12090 | ELP [Source:UniProtKB/TrEMBL;Acc:A0A178W1A4] | ELP | 837761 | 0.0089233 |
| AT3G08040 | AT3G08040 | Protein DETOXIFICATION 43 [Source:UniProtKB/Swiss-Prot;Acc:Q9SFB0] | DTX43 | 819995 | 0.0089233 |
| AT1G61480 | AT1G61480 | G-type lectin S-receptor-like serine/threonine-protein kinase At1g61480 [Source:UniProtKB/Swiss-Prot;Acc:O64771] | 842442 | 0.0091793 |
| ensembl_gene_id | description | external_gene_name | entrezgene_id | pval | |
|---|---|---|---|---|---|
| AT4G25100 | AT4G25100 | Superoxide dismutase [Fe] 1, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:P21276] | FSD1 | 828613 | 0.0000000 |
| AT5G64120 | AT5G64120 | Peroxidase [Source:UniProtKB/TrEMBL;Acc:A0A178UAR8] | PER71 | 836533 | 0.0000000 |
| AT5G23980 | AT5G23980 | Ferric reduction oxidase 4 [Source:UniProtKB/Swiss-Prot;Acc:Q8W110] | FRO4 | 832463 | 0.0000056 |
| AT1G67270 | AT1G67270 | Zinc-finger domain of monoamine-oxidase A repressor R1 protein [Source:TAIR;Acc:AT1G67270] | 843047 | 0.0000076 | |
| AT2G39040 | AT2G39040 | Peroxidase 24 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZV04] | PER24 | 818490 | 0.0006745 |
| AT5G23990 | AT5G23990 | ferric reduction oxidase 5 [Source:TAIR;Acc:AT5G23990] | ATFRO5 | 832464 | 0.0008638 |
| AT5G39580 | AT5G39580 | Peroxidase 62 [Source:UniProtKB/Swiss-Prot;Acc:Q9FKA4] | PER62 | 833954 | 0.0011646 |
| AT5G39890 | AT5G39890 | Plant cysteine oxidase 2 [Source:UniProtKB/Swiss-Prot;Acc:Q8LGJ5] | PCO2 | 833986 | 0.0014364 |
| AT1G14550 | AT1G14550 | Peroxidase 5 [Source:UniProtKB/Swiss-Prot;Acc:Q9M9Q9] | PER5 | 838017 | 0.0015432 |
Design matrix not provided. Switch to the classic mode.
CnF-cnF
Down 666
NotSig 18680
Up 746
[1] "ON SELECTIONNE AU TOTAL :"
[1] "1412 genes with fdr < 0.01"
Design matrix not provided. Switch to the classic mode.
CNf-cNf
Down 1040
NotSig 18086
Up 1232
[1] "ON SELECTIONNE AU TOTAL :"
[1] "2272 genes with fdr < 0.01"
Design matrix not provided. Switch to the classic mode.
Cnf-cnf
Down 603
NotSig 18788
Up 1031
[1] "ON SELECTIONNE AU TOTAL :"
[1] "1634 genes with fdr < 0.01"
Design matrix not provided. Switch to the classic mode.
cNF-cNf
Down 3497
NotSig 12986
Up 3923
[1] "ON SELECTIONNE AU TOTAL :"
[1] "7420 genes with fdr < 0.01"
Design matrix not provided. Switch to the classic mode.
CNf-cNf
Down 1040
NotSig 18086
Up 1232
[1] "ON SELECTIONNE AU TOTAL :"
[1] "2272 genes with fdr < 0.01"
Combinatoire 222 : Fer Nitrate CO2 pour Tomate et Arabidopsis : 48 transcritpômes
Données brutes traitées ave un pipeline fastp + STAR (stringenace 2 mismatch) + htseq-count pour obtenir des matrices de comptage (scripts python et bash sur cluster)
Qualité très statisfaisante, seul 3 échantillons de la condition CNF, ont une couverture un peu infériere aux autres, mais toujours très correcte
Analyse d’homogénéité entre les réplicats biologiques de chacune des 8 conditions (html avec correlation histograms), on conclu a des bons réplicats biologiques
Analyse DE globale edgeR entre toutes les conditions, desig combinatoire encore à affiner et travailler puis clustering Coseq et ACP : très fort effet du Fer (second axe principal), effet tangible du nitrate (Troisième axe principal), CO2 peu décernable (on ne voit rien sur le quatrième axe principal)
On a 3*4 comparaisons de possibles entre nos transcriptômes. Pour chaque facteur, on le compare dans les 4 combinaisons possibles des 2 autres facteurs.
DEgenes = list()
nbDe = c()
for (comp in comparables) {
print(comp)
genes = getDEgenes(comp)
DEgenes[[length(DEgenes) + 1]] = genes
nbDe = c(nbDe, length(genes))
}[1] "cNF-cnF"
[1] "cNF" "cnF"
Design matrix not provided. Switch to the classic mode.
[1] "cNF-CNF"
[1] "cNF" "CNF"
Design matrix not provided. Switch to the classic mode.
[1] "cNF-cNf"
[1] "cNF" "cNf"
Design matrix not provided. Switch to the classic mode.
[1] "cnF-cNF"
[1] "cnF" "cNF"
Design matrix not provided. Switch to the classic mode.
[1] "cnF-CnF"
[1] "cnF" "CnF"
Design matrix not provided. Switch to the classic mode.
[1] "cnF-cnf"
[1] "cnF" "cnf"
Design matrix not provided. Switch to the classic mode.
[1] "CNF-cNF"
[1] "CNF" "cNF"
Design matrix not provided. Switch to the classic mode.
[1] "CNF-CnF"
[1] "CNF" "CnF"
Design matrix not provided. Switch to the classic mode.
[1] "CNF-CNf"
[1] "CNF" "CNf"
Design matrix not provided. Switch to the classic mode.
[1] "CnF-cnF"
[1] "CnF" "cnF"
Design matrix not provided. Switch to the classic mode.
[1] "CnF-CNF"
[1] "CnF" "CNF"
Design matrix not provided. Switch to the classic mode.
[1] "CnF-Cnf"
[1] "CnF" "Cnf"
Design matrix not provided. Switch to the classic mode.
[1] "cNf-cNF"
[1] "cNf" "cNF"
Design matrix not provided. Switch to the classic mode.
[1] "cNf-cnf"
[1] "cNf" "cnf"
Design matrix not provided. Switch to the classic mode.
[1] "cNf-CNf"
[1] "cNf" "CNf"
Design matrix not provided. Switch to the classic mode.
[1] "cnf-cnF"
[1] "cnf" "cnF"
Design matrix not provided. Switch to the classic mode.
[1] "cnf-cNf"
[1] "cnf" "cNf"
Design matrix not provided. Switch to the classic mode.
[1] "cnf-Cnf"
[1] "cnf" "Cnf"
Design matrix not provided. Switch to the classic mode.
[1] "Cnf-CnF"
[1] "Cnf" "CnF"
Design matrix not provided. Switch to the classic mode.
[1] "Cnf-cnf"
[1] "Cnf" "cnf"
Design matrix not provided. Switch to the classic mode.
[1] "Cnf-CNf"
[1] "Cnf" "CNf"
Design matrix not provided. Switch to the classic mode.
[1] "CNf-CNF"
[1] "CNf" "CNF"
Design matrix not provided. Switch to the classic mode.
[1] "CNf-cNf"
[1] "CNf" "cNf"
Design matrix not provided. Switch to the classic mode.
[1] "CNf-Cnf"
[1] "CNf" "Cnf"
Design matrix not provided. Switch to the classic mode.
save(file = "DEgenes.Rdata", DEgenes)
save(file = "Comparisons.RData", comparables)
# nbDe <- sapply(comparables, getGEgenesNumber)
df <- data.frame(Comparisons = comparables, DEGNumber = nbDe)
df <- df[rownames(df)[which(!duplicated(df$DEGNumber))], ]
kable(df[order(-df$DEGNumber), ])| Comparisons | DEGNumber | |
|---|---|---|
| 6 | cnF-cnf | 10268 |
| 12 | CnF-Cnf | 9602 |
| 9 | CNF-CNf | 7425 |
| 3 | cNF-cNf | 7420 |
| 14 | cNf-cnf | 5365 |
| 1 | cNF-cnF | 2812 |
| 21 | Cnf-CNf | 2735 |
| 15 | cNf-CNf | 2272 |
| 8 | CNF-CnF | 1771 |
| 18 | cnf-Cnf | 1634 |
| 5 | cnF-CnF | 1412 |
| 2 | cNF-CNF | 64 |
library(reshape2)
p <- ggplot(data = df, aes(x = Comparisons, y = DEGNumber, fill = Comparisons)) +
geom_dotplot(binaxis = "y", stackdir = "center") + theme(axis.text.x = element_text(angle = 320,
hjust = 0, colour = "grey50"), plot.title = element_text(size = 14, face = "bold")) +
ggtitle(paste("Number of DE genes for every condition comparisons"))
p # Interprêtations
Le fer semble le facteur le plus impactant, suivi du nitrate. Les début d’analyses d’ontologies sont plutôt concluants.
Le fait que l’effet CO2 en conditions normale soit si faible est marquant, et il semble accentué en condition de stress nutritionnel. Comme cet échantillon a été traité comme les autres tout au cours du pipeline, on ne voit pas d’autre explication à ce stade.
Il reste à comparer les gènes trouvés en conditions normales et altérées des autres facteurs, pour voir si on retrouve bien les mêmes.
Voir si des gènes bien connus pour répondre à un facteur sont retrouvés dans la liste d’expression différentielle.
Refaire pareil avec la tomate, et trouver des gènes orthologues.
Reste à faire une meilleure analyse globale en fixant bien les contrastes et design de l’estimation de dispersion et tests.
library(ggVennDiagram)
library(VennDiagram)
names(DEgenes) <- comparables
getOneFactorComparisons <- function(factor = "C") {
if (factor == "C")
pos = 1 else if (factor == "n")
pos = 2 else if (factor == "f")
pos = 3
poss = seq(1:3)
others = poss[poss != pos]
res = c()
for (comp in comparables) {
labels = str_split_fixed(comp, "-", 2)
diffFactor = substr(labels[1], pos, pos) != substr(labels[2], pos, pos)
other1 = substr(labels[1], others[1], others[1]) == substr(labels[2], others[1],
others[1])
other2 = substr(labels[1], others[2], others[2]) == substr(labels[2], others[2],
others[2])
if (diffFactor & other1 & other2 & grepl(factor, labels[1])) {
res = c(res, comp)
}
}
return(res)
}
DEgenesCO2 = list()
for (comp in getOneFactorComparisons()) {
print(comp)
genes = getDEgenes(comp)
DEgenesCO2[[length(DEgenesCO2) + 1]] = genes
}[1] "CNF-cNF"
[1] "CNF" "cNF"
Design matrix not provided. Switch to the classic mode.
[1] "CnF-cnF"
[1] "CnF" "cnF"
Design matrix not provided. Switch to the classic mode.
[1] "Cnf-cnf"
[1] "Cnf" "cnf"
Design matrix not provided. Switch to the classic mode.
[1] "CNf-cNf"
[1] "CNf" "cNf"
Design matrix not provided. Switch to the classic mode.
names(DEgenesCO2) = getOneFactorComparisons()
ggVennDiagram(DEgenesCO2, main = "Differentially expressed genes in reponse to CO2")DEgenesN = list()
for (comp in getOneFactorComparisons("n")) {
print(comp)
genes = getDEgenes(comp)
DEgenesN[[length(DEgenesN) + 1]] = genes
}[1] "cnF-cNF"
[1] "cnF" "cNF"
Design matrix not provided. Switch to the classic mode.
[1] "CnF-CNF"
[1] "CnF" "CNF"
Design matrix not provided. Switch to the classic mode.
[1] "cnf-cNf"
[1] "cnf" "cNf"
Design matrix not provided. Switch to the classic mode.
[1] "Cnf-CNf"
[1] "Cnf" "CNf"
Design matrix not provided. Switch to the classic mode.
names(DEgenesN) = getOneFactorComparisons("n")
ggVennDiagram(DEgenesN, main = "Differentially expressed genes in reponse to Nitrate")DEgenesFe = list()
for (comp in getOneFactorComparisons("f")) {
print(comp)
genes = getDEgenes(comp)
DEgenesFe[[length(DEgenesFe) + 1]] = genes
}[1] "cNf-cNF"
[1] "cNf" "cNF"
Design matrix not provided. Switch to the classic mode.
[1] "cnf-cnF"
[1] "cnf" "cnF"
Design matrix not provided. Switch to the classic mode.
[1] "Cnf-CnF"
[1] "Cnf" "CnF"
Design matrix not provided. Switch to the classic mode.
[1] "CNf-CNF"
[1] "CNf" "CNF"
Design matrix not provided. Switch to the classic mode.
names(DEgenesFe) = getOneFactorComparisons("f")
ggVennDiagram(DEgenesFe, main = "Differentially expressed genes in reponse to Iron")